########################################################################################################################
# "Is Social Policy Popularly Demanded?"
# Alexander Hertel-Fernandez and Konstantin Kashin 
# April 2011
# Macro-Level Analysis
########################################################################################################################
########################################################################################################################
# This code replicates the main macro-level findings that we present in our paper. 
########################################################################################################################



library(xtable)
library(foreign)
library(Zelig)
library(pcse)
library(plm)
library(car)
library(lmtest)
library(lattice)
library(trellis)
getwd()

##########################################################################################

load("AHKK_macrodata.Rdata")
macrodata <- read.dta("AHKK_macrodata.dta")
head(macrodata)


### Truncate sample to 1991-2007 for pooled and collapsed analysis
macrodata.1991.2007 <- macrodata[macrodata$year >=1991 & macrodata$year <=2007,]
head(macrodata.1991.2007)
tail(macrodata.1991.2007)

## Load state index
state.index <- read.csv("state_index.csv")
state.index$stabrev <- as.character(as.vector(state.index$stabrev))
state.index$stname <- as.character(as.vector(state.index$stname))
state.index$stnameCAP <- as.character(as.vector(state.index$stnameCAP))
state.index[state.index$stabrev=="DC",4] <- "District of Columbia"




##########################################################################################
### Run Descriptive Statistics
##########################################################################################

# We are going to drop Nebraska and DC because we do not have any political statistics on them and exclude them from our subsequent macro-level analysis 

descriptive.data <- macrodata.1991.2007[macrodata.1991.2007$stabrev!="NE" & macrodata.1991.2007$stabrev!="DC",]


# Get basic descriptive statistics (mean, max, min, sd, etc.) for each of main variables

unempben.sum <- summary(descriptive.data$unempben)
unempben.sd <- sd(descriptive.data$unempben)
unempgini.sum <- summary(descriptive.data$unemp_gini)
unempgini.sd <- sd(descriptive.data$unemp_gini)
famincgini.sum <- summary(descriptive.data$faminc_gini)
famincgini.sd <- sd(summary(descriptive.data$faminc_gini))
meddurgini.sum <- summary(descriptive.data$median_dur)
meddurgini.sd <- sd(descriptive.data$median_dur)
medwage.sum <- summary(descriptive.data$med_wage)
medwage.sd <- sd(descriptive.data$med_wage)
unionshare.sum <- summary(descriptive.data$unionshare)
unionshare.sd <- sd(descriptive.data$unionshare)
blackshare.sum <- summary(descriptive.data$blackshare)
blackshare.sd <- sd(descriptive.data$blackshare)
stateunemp.sum <- summary(descriptive.data$stateunemp)
stateunemp.sd <- sd(descriptive.data$stateunemp)
highskill.sum <- summary(descriptive.data$highskillshare)
highskill.sd <- sd(descriptive.data$highskillshare)
govparty.sum <- summary(descriptive.data$govparty)
govparty.sd <- sd(descriptive.data$govparty)
aupcont.sum <- summary(descriptive.data$aupcont)
aupcont.sd <- sd(descriptive.data$aupcont)

descriptives <- rbind(unempben.sum,unempgini.sum,famincgini.sum,meddurgini.sum,medwage.sum,unionshare.sum,blackshare.sum,stateunemp.sum,highskill.sum,govparty.sum,aupcont.sum)

descriptives.sd <- c(unempben.sd,unempgini.sd,famincgini.sd,meddurgini.sd,medwage.sd,unionshare.sd,blackshare.sd,stateunemp.sd,highskill.sd,govparty.sd,aupcont.sd)

desc.table <- cbind(descriptives, descriptives.sd)
xtable(desc.table)


##########################################################################################
### Pooled specification: run basic OLS on data pooled across states for 1991-2007; bivariate regressions where outcome variable is UI generosity and the explanatory variable is a different measure of risk in each case
##########################################################################################

# Unemployment Risk Inequality as I.V.
lm1 <- lm(unempben~ unemp_gini, data=macrodata.1991.2007)
summary(lm1)

# Unemployed to Employed Family Income Inequality as I.V.
lm2 <- lm(unempben~ faminc_gini,data=macrodata.1991.2007)
summary(lm2)

# Mean Duration of Unemloyment Inequality as I.V.
lm3 <- lm(unempben~ meandur_gini, data=macrodata.1991.2007)
summary(lm3)

# Median Duration of Unemloyment Inequality as I.V.
lm4 <- lm(unempben~ median_dur, data=macrodata.1991.2007)
summary(lm4)


##########################################################################################
### Basic plots over time by state
##########################################################################################

# Unemployment insurance (UI) generosity across time by state
xyplot(unempben~ year|stabrev, data=macrodata, main="Unemployment Benefits Generosity from 1980-2008 by State",ylab="Unemployment Benefits Generosity", xlab="Year",pch=20, col="navy",
panel=function(x, y){
	panel.xyplot(x, y, type="l", col="navy") 
	}
)

# Unemployment Risk Inequality Gini across time by state
xyplot(unemp_gini~ year|stabrev, data=macrodata, main="Unemployment Gini from 1980-2008 by State",ylab="Unemployment Gini", xlab="Year",pch=20, col="navy",
panel=function(x, y){
	panel.xyplot(x, y, type="l", col="navy") 
	}
)


# Family income ratio gini across time by state
xyplot(faminc_gini~ year|stabrev, data=macrodata, main="Unemployment Gini from 1980-2008 by State",ylab="Unemployment Gini", xlab="Year",pch=20, col="navy",
panel=function(x, y){
	panel.xyplot(x, y, type="l", col="navy") 
	}
)


# Unemployment mean duration gini across time by state
xyplot(meandur_gini~ year|stabrev, data=macrodata, main="Mean Unemployment Duration Gini from 1980-2008 by State",ylab="Mean Unemployment Duration Gini", xlab="Year",pch=20, col="navy",
panel=function(x, y){
	panel.xyplot(x, y, type="l", col="navy") 
	}
)



# Unemployment median duration gini across time by state
xyplot(median_dur~ year|stabrev, data=macrodata, main="Median Unemployment Duration Gini from 1980-2008 by State",ylab="Median Unemployment Duration Gini", xlab="Year",pch=20, col="navy",
panel=function(x, y){
	panel.xyplot(x, y, type="l", col="navy") 
	}
)


# unemployment benefits generosity vs. unemployment gini by state
xyplot(unempben~ unemp_gini|stabrev, data=macrodata, main="Unemployment Gini vs. Unemployment Generosity by State",ylab="Unemployment Benefits Generosity", xlab="Unemployment Gini",pch=20, col="navy",
panel=function(x, y){
	panel.xyplot(x, y, type="p", col="navy", cex=.75, pch=21) 
	#panel.loess(x, y, span=1) 
	#panel.lmline(x, y, lty=2, col="red")
	}
)




##########################################################################################
### Cross-sectional specification: collapse data by state using averages of variables and
### run OLS
##########################################################################################

state.collapsed <- by(macrodata.1991.2007,macrodata.1991.2007$stname, function(subset) mean(subset[,-c(1,2,4)], na.rm=TRUE))
state.collapsed <- as.data.frame(do.call("rbind", state.collapsed))
head(state.collapsed)
# note: this doesn't work for factor variables, so will get warnings; however, it works fine for variables of interest


# State with median welfare generosity is Michigan (statefip is 26)
state.collapsed[state.collapsed$unempben==median(state.collapsed$unempben),]


# let's relabel the states with abbreviations using the state index we imported (we do this because state abbreviations do not transfer when collapsing)
state.collapsed$stabrev <- rep(NA, nrow(state.collapsed))
for(i in 1:nrow(state.collapsed)){
	state.collapsed$stabrev[i] <- state.index[state.index$FIPS==state.collapsed$statefip[i],]$stabrev
	}


# Scatterplot of unemployment benefits and unemployment gini
plot(state.collapsed$unemp_gini, state.collapsed$unempben, cex=0, xlab="Unemployment Gini", ylab="Unemployment Benefit Generosity", main="Unemployment Gini vs. Unemployment Insurance Generosity by State, 1980-2008", cex.main=.97)
text(state.collapsed$unemp_gini, state.collapsed$unempben, labels=state.collapsed$stabrev, col="navy", main="")



# Scatterplot of unemployment benefits and unemployment gini
plot(state.collapsed$highskillshare, state.collapsed$unempben, cex=0, xlab="Unemployment Gini", ylab="Unemployment Benefit Generosity", main="Unemployment Gini vs. Unemployment Insurance Generosity by State, 1980-2008", cex.main=.97)
text(state.collapsed$highskillshare, state.collapsed$unempben, labels=state.collapsed$stabrev, col="navy", main="")




##############################################################################

### Model 1: Univariate Regressions

lm.collapse.1a <- lm(unempben ~ unemp_gini,data=state.collapsed)
summary(lm.collapse.1a)

lm.collapse.1b <- lm(unempben ~ faminc_gini,data=state.collapsed)
summary(lm.collapse.1b)

lm.collapse.1c <- lm(unempben ~ meandur_gini,data=state.collapsed)
summary(lm.collapse.1c)

lm.collapse.1d <- lm(unempben ~ median_dur,data=state.collapsed)
summary(lm.collapse.1d)



### Model 2: Control for Demographics

lm.collapse.2a <- lm(unempben ~ unemp_gini + urbanshare + blackshare + ba_share,data=state.collapsed)
summary(lm.collapse.2a)

lm.collapse.2b <- lm(unempben ~ faminc_gini + urbanshare + blackshare + ba_share,data=state.collapsed)
summary(lm.collapse.2b)

lm.collapse.2c <- lm(unempben ~ meandur_gini + urbanshare + blackshare + ba_share,data=state.collapsed)
summary(lm.collapse.2c)

lm.collapse.2d <- lm(unempben ~ median_dur + urbanshare + blackshare + ba_share,data=state.collapsed)
summary(lm.collapse.2d)


### Model 3: Control for Demographic & Economic Variables
# control for median wage (collinear with share of poor so never run together)
# control for level of unemployment
# do not include share of poor since poissonst-treatment

lm.collapse.3a <- lm(unempben ~ unemp_gini + urbanshare + blackshare + ba_share + med_wage + unionshare + stateunemp,data=state.collapsed)
summary(lm.collapse.3a)

lm.collapse.3b <- lm(unempben ~ faminc_gini + urbanshare + blackshare + ba_share + med_wage + unionshare + stateunemp,data=state.collapsed)
summary(lm.collapse.3b)

lm.collapse.3c <- lm(unempben ~ meandur_gini + urbanshare + blackshare + ba_share + med_wage + unionshare + stateunemp,data=state.collapsed)
summary(lm.collapse.3c)

lm.collapse.3d <- lm(unempben ~ median_dur + urbanshare + blackshare + ba_share + med_wage + unionshare + stateunemp,data=state.collapsed)
summary(lm.collapse.3d)


### Model 5: Control for Skills
lm.collapse.5a <- lm(unempben ~  blackshare + log(med_wage) + unionshare + stateunemp + highskillshare +
I(highskillshare^2) + upcont + govparty,data=state.collapsed)
summary(lm.collapse.5a)


### Control for Skills - REMOVE RISK
lm.collapse.5b <- lm(unempben ~ blackshare + log(med_wage) + unionshare + stateunemp + highskillshare + I(highskillshare^2)+ upcont + govparty,data=state.collapsed)
summary(lm.collapse.5b)




######################################################################################
##########################################################################################
### Panel Data
##########################################################################################
######################################################################################

### Set structure of dataset to panel; N is indexed with "stname" and T is indexed with "year"
macro.panel <- pdata.frame(macrodata, index=c("stname", "year"))
head(macro.panel)

macro.panel$time <- as.numeric(as.vector(macro.panel$year))
groupID <- unique(macro.panel$statefip)
##########################################################################################




######################################################################################
### Skills as Main Explanatory Variable ###
######################################################################################

# Begin with autoregressive distributed lags (ADL) model (LDV and AR(1) specifications are nested within this more encompassing model)
plm.skills.1 <- plm(unempben ~ lag(unempben, 1) + lag(highskillshare, 0:1) + lag(I(highskillshare^2), 0:1) + lag(stateunemp, 0:1) + lag(blackshare, 0:1) + lag(log(med_wage), 0:1) + lag(unionshare, 0:1) + lag(aupcont, 0:1) + lag(govparty, 0:1) + time, data=macro.panel, method="within")

# Durbin-Watson test for autocorrelation; autocorrelation detected since null rejected #
pdwtest(plm.skills.1)
pbgtest(plm.skills.1, order=1)

# Add another lag of dependent variable, as recommended by Beck and Katz (2011)
# ADLLDV2 model
plm.skills.2 <- plm(unempben ~ lag(unempben, 1:2) + lag(highskillshare, 0:1) + lag(I(highskillshare^2), 0:1) + lag(stateunemp, 0:1) + lag(blackshare, 0:1) + lag(log(med_wage), 0:1) + lag(unionshare, 0:1) + lag(aupcont, 0:1) + lag(govparty, 0:1) + time, data=macro.panel, method="within")
summary(plm.skills.2)

# Durbin-Watson test for autocorrelation; autocorrelation not detected in residuals since null not rejected #
pdwtest(plm.skills.2)

# Verify lack of autocorrelation with Breusch-Godfrey/Wooldridge test
pbgtest(plm.skills.2, order=1)


# Wald test (F-test) to see if coefficients on lagged explanatory variables might 0, in which case ADLLDV2 model will simplify to LDV2 model

linearHypothesis(plm.skills.2, c("lag(highskillshare, 0:1)1", "lag(I(highskillshare^2), 0:1)1", "lag(stateunemp, 0:1)1", "lag(blackshare, 0:1)1", "lag(log(med_wage), 0:1)1", "lag(unionshare, 0:1)1", "lag(aupcont, 0:1)1", "lag(govparty, 0:1)1"), rep(0,8), test=c("Chisq", "F"))

# F-test on unlagged explanatory variables
linearHypothesis(plm.skills.2, c("lag(highskillshare, 0:1)0", "lag(I(highskillshare^2), 0:1)0", "lag(stateunemp, 0:1)0", "lag(blackshare, 0:1)0", "lag(log(med_wage), 0:1)0", "lag(unionshare, 0:1)0", "lag(aupcont, 0:1)0", "lag(govparty, 0:1)0"), rep(0,8), test=c("Chisq", "F"))


# Create function that exports data run in plm to a matrix that can then be run in lm
# Note that panels must be balanced and the data should be ordered by panel then time
y.export <- function(plm.obj, npanel, ntime){
	unitID <- NULL
	for(i in 1:npanel){
		unitID <- append(unitID, rep(as.numeric(i), ntime))
		}
	response <- cbind(as.vector(plm.obj$model[,1]), unitID)
	response.demean <- stack(by(response, response[,2], function(subset) subset[,1]-mean(subset[,1])))[,1]
	return(response.demean)
	}
	
skills.2.MM <- as.data.frame(model.matrix(plm.skills.2))
skills.2.demean.y <- y.export(plm.skills.2, npanel=49, ntime=17)



# Panel bootstrapped SE (see Bischof 2009)

# Define function for data into list form; each panel is demeaned (since FE model) and stored as a separate element in the list
data.to.list <- function(plm.obj, npanel, ntime){
	unitID <- NULL
	for(i in 1:npanel){
		unitID <- append(unitID, rep(as.numeric(i), ntime))
		}
	response <- cbind(as.vector(plm.obj$model[,1]), unitID)
	response.demean <- stack(by(response, response[,2], function(subset) subset[,1]-mean(subset[,1])))[,1]
	data.ID <- as.data.frame(cbind(response.demean, model.matrix(plm.obj)))
	split.data <- split(data.ID, unitID)
	return(split.data)
	}

# Run function on model from skills 2
plm.skills.2.list <- data.to.list(plm.skills.2, npanel=49, ntime=17)

# Define panel sample function: samples N out of N panels with replacement and runs OLS on ADLLDV2 model
panel.boot <- function(split.MM, npanel){
	index <- sample(1:npanel, size=npanel, replace=TRUE)
	mm.boot <- do.call("rbind", split.MM[index])[,-1]
	response.boot <- do.call("rbind", split.MM[index])[,1]
	lm.boot <- lm(response.boot~., data=mm.boot)
	return(coefficients(lm.boot))
	}

# Run bootstrap 1000 times
set.seed(01238)
plm.skills.2.boot <- replicate(1000, panel.boot(plm.skills.2.list, npanel=49))


# Calculate bootstrapped standard errors
plm.skills.2.boot.se <- apply(plm.skills.2.boot, MARGIN=1, FUN=sd)
plm.skills.2.boot.se.out <- xtable(as.matrix(plm.skills.2.boot.se))
digits(plm.skills.2.boot.se.out) <- 4
plm.skills.2.boot.se.out




#### Regression on undemeaned data using fixed effects ####


# let's generate the unit and time labels needed to do so
# we have 17 time observations #
unique.time <- 1991:2007 
groupT <- rep(1991:2007, 49)

# we have 49 cross-sectional units; we omit Nebraska (statefip=33) due to the fact that political variables do not apply to it and Washington DC (statefip=11) since we don't have variables for it (and doesn't make sense to impute)#
groupID.adj <- groupID[groupID!=33 & groupID!=11]
length(groupID.adj)

groupN <- c()
for(i in 1:length(groupID.adj)){
	groupN <- append(groupN, rep(groupID.adj[i],length(unique.time) ) )
	}

data.skills.2.non.demeaned <- cbind(as.data.frame(plm.skills.2$model), as.factor(groupN))
colnames(data.skills.2.non.demeaned) <- c("unempben", "L1.unempben", "L2.unempben", "highskillshare", "L1.highskillshare", "highskillsq", "L1.highskillsq", "stateunemp", "L1.stateunemp", "blackshare", "L1.blackshare", "logmedwage", "L1.logmedwage", "unionshare", "L1.unionshare", "aupcont", "L1.aupcont", "govparty", "L1.govparty", "time", "fip")
skills.2.y.nondemean <- data.skills.2.non.demeaned[,1]
skills.2.MM.nondemean <-data.skills.2.non.demeaned[,-1] 
lm.skills.2.nondemean <- lm(skills.2.y.nondemean ~.-1, data=skills.2.MM.nondemean)
summary(lm.skills.2.nondemean)
xtable(summary(lm.skills.2.nondemean))



##### IMPULSE RESPONSE FUNCTION #####

set.seed(02138)
beta.skills.2.draws <- mvrnorm(10000, mu = coefficients(lm.skills.2.nondemean), Sigma = vcov(lm.skills.2.nondemean))



### IRF with minimum equilibrium value value of high skill share
initial.x <-  summary(data.skills.2.non.demeaned$highskillshare)["Min."]
sd.skill <- sd(data.skills.2.non.demeaned$highskillshare)

# marginal response at time t=0
marginal.0 <- (beta.skills.2.draws[,4]+2*beta.skills.2.draws[,6]*initial.x)*sd.skill

# decay for time t=1
theta <- beta.skills.2.draws[,2]
marginal.1 <- sd.skill*(theta*marginal.0 + beta.skills.2.draws[,5]+beta.skills.2.draws[,7]*initial.x)
  
# decay for time=2
gamma <- beta.skills.2.draws[,3]
marginal.2 <- (theta*(marginal.1) +gamma*marginal.0)*sd.skill

# decay for time=3
marginal.3 <- (theta*(marginal.2) +gamma*marginal.1)*sd.skill

# decay for time=4
marginal.4 <- (theta*(marginal.3) +gamma*marginal.2)*sd.skill

# decay for time=5
marginal.5 <- (theta*(marginal.4) +gamma*marginal.3)*sd.skill

marginal.effect <- cbind(marginal.0, marginal.1, marginal.2, marginal.3, marginal.4, marginal.5)
plot(x=0:5, apply(marginal.effect, 2, FUN=mean), type="l", col="navy", ylim=c(-.01, .05), xlab="Time", ylab="Marginal Effect on UI Generosity", main="Minimum Equilibrium Value of Skill Share")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .95), type="l", lty=2, col="navy")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .05), type="l", lty=2, col="navy")
abline(h=0, lty=2, col="red")





### IRF with 25% percentile equilibrium value value of high skill share

summary(data.skills.2.non.demeaned$L1.highskillshare)["1st Qu."]

initial.x <-  summary(data.skills.2.non.demeaned$highskillshare)["1st Qu."]
sd.skill <- sd(data.skills.2.non.demeaned$highskillshare)

# marginal response at time t=0
marginal.0 <- (beta.skills.2.draws[,4]+2*beta.skills.2.draws[,6]*initial.x)*sd.skill

# decay for time t=1
theta <- beta.skills.2.draws[,2]
marginal.1 <- sd.skill*(theta*marginal.0 + beta.skills.2.draws[,5]+beta.skills.2.draws[,7]*initial.x)
 
# decay for time=2
gamma <- beta.skills.2.draws[,3]
marginal.2 <- (theta*(marginal.1) +gamma*marginal.0)*sd.skill

# decay for time=3
marginal.3 <- (theta*(marginal.2) +gamma*marginal.1)*sd.skill

# decay for time=4
marginal.4 <- (theta*(marginal.3) +gamma*marginal.2)*sd.skill

# decay for time=5
marginal.5 <- (theta*(marginal.4) +gamma*marginal.3)*sd.skill

marginal.effect <- cbind(marginal.0, marginal.1, marginal.2, marginal.3, marginal.4, marginal.5)
plot(x=0:5, apply(marginal.effect, 2, FUN=mean), type="l", col="navy", ylim=c(-.01, .05), xlab="Time", ylab="Marginal Effect on UI Generosity", main="1st Quartile Equilibrium Value of Skill Share")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .95), type="l", lty=2, col="navy")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .05), type="l", lty=2, col="navy")
abline(h=0, lty=2, col="red")




### IRF with median equilibrium value value of high skill share

initial.x <-  summary(data.skills.2.non.demeaned$highskillshare)["Median"]
sd.skill <- sd(data.skills.2.non.demeaned$highskillshare)

# marginal response at time t=0
marginal.0 <- (beta.skills.2.draws[,4]+2*beta.skills.2.draws[,6]*initial.x)*sd.skill

# decay for time t=1
theta <- beta.skills.2.draws[,2]
marginal.1 <- sd.skill*(theta*marginal.0 + beta.skills.2.draws[,5]+beta.skills.2.draws[,7]*initial.x)
 
# decay for time=2
gamma <- beta.skills.2.draws[,3]
marginal.2 <- (theta*(marginal.1) +gamma*marginal.0)*sd.skill

# decay for time=3
marginal.3 <- (theta*(marginal.2) +gamma*marginal.1)*sd.skill

# decay for time=4
marginal.4 <- (theta*(marginal.3) +gamma*marginal.2)*sd.skill

# decay for time=5
marginal.5 <- (theta*(marginal.4) +gamma*marginal.3)*sd.skill

marginal.effect <- cbind(marginal.0, marginal.1, marginal.2, marginal.3, marginal.4, marginal.5)
plot(x=0:5, apply(marginal.effect, 2, FUN=mean), type="l", col="navy", ylim=c(-.01, .05), xlab="Time", ylab="Marginal Effect on UI Generosity", main="Median Equilibrium Value of Skill Share")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .95), type="l", lty=2, col="navy")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .05), type="l", lty=2, col="navy")
abline(h=0, lty=2, col="red")



### IRF with 75% percentile equilibrium value value of high skill share

summary(data.skills.2.non.demeaned$L1.highskillshare)["3rd Qu."]

initial.x <-  summary(data.skills.2.non.demeaned$highskillshare)["3rd Qu."]
sd.skill <- sd(data.skills.2.non.demeaned$highskillshare)

# marginal response at time t=0
marginal.0 <- (beta.skills.2.draws[,4]+2*beta.skills.2.draws[,6]*initial.x)*sd.skill

# decay for time t=1
theta <- beta.skills.2.draws[,2]
marginal.1 <- sd.skill*(theta*marginal.0 + beta.skills.2.draws[,5]+beta.skills.2.draws[,7]*initial.x)
 
# decay for time=2
gamma <- beta.skills.2.draws[,3]
marginal.2 <- (theta*(marginal.1) +gamma*marginal.0)*sd.skill

# decay for time=3
marginal.3 <- (theta*(marginal.2) +gamma*marginal.1)*sd.skill

# decay for time=4
marginal.4 <- (theta*(marginal.3) +gamma*marginal.2)*sd.skill

# decay for time=5
marginal.5 <- (theta*(marginal.4) +gamma*marginal.3)*sd.skill

marginal.effect <- cbind(marginal.0, marginal.1, marginal.2, marginal.3, marginal.4, marginal.5)
plot(x=0:5, apply(marginal.effect, 2, FUN=mean), type="l", col="navy", ylim=c(-.01, .05), xlab="Time", ylab="Marginal Effect on UI Generosity", main="3rd Quartile Equilibrium Value of Skill Share")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .95), type="l", lty=2, col="navy")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .05), type="l", lty=2, col="navy")
abline(h=0, lty=2, col="red")








######################################################################################
### Unemployment Risk Gini as Main Explanatory Variable ###
######################################################################################


# Begin with autoregressive distributed lags (ADL) model (LDV and AR(1) specifications are nested within this more encompassing model)
plm.unemp.1 <- plm(unempben ~ lag(unempben, 1) + lag(unemp_gini, 0:1) + lag(I(unemp_gini^2), 0:1) + lag(stateunemp, 0:1) + lag(blackshare, 0:1) + lag(log(med_wage), 0:1) + lag(unionshare, 0:1) + lag(aupcont, 0:1) + lag(govparty, 0:1) + time, data=macro.panel, method="within")

# Durbin-Watson test for autocorrelation; autocorrelation detected since null rejected #
pdwtest(plm.unemp.1)
pbgtest(plm.unemp.1, order=1)

# Add another lag of dependent variable, as recommended by Beck and Katz (2011)
# ADLLDV2 model
plm.unemp.2 <- plm(unempben ~ lag(unempben, 1:2) + lag(unemp_gini, 0:1) + lag(I(unemp_gini^2), 0:1) + lag(stateunemp, 0:1) + lag(blackshare, 0:1) + lag(log(med_wage), 0:1) + lag(unionshare, 0:1) + lag(aupcont, 0:1) + lag(govparty, 0:1) + time, data=macro.panel, method="within")
class(summary(plm.unemp.2))

coef.out <- xtable(summary(plm.unemp.2)$coefficients, digits=4)
coef.out


# Durbin-Watson test for autocorrelation; autocorrelation not detected in residuals since null not rejected #
pdwtest(plm.unemp.2)

# Verify lack of autocorrelation with Breusch-Godfrey/Wooldridge test
pbgtest(plm.unemp.2, order=1)


# Wald test (F-test) to see if coefficients on lagged explanatory variables might 0, in which case ADLLDV2 model will simplify to LDV2 model

linearHypothesis(plm.unemp.2, c("lag(unemp_gini, 0:1)1", "lag(I(unemp_gini^2), 0:1)1", "lag(stateunemp, 0:1)1", "lag(blackshare, 0:1)1", "lag(log(med_wage), 0:1)1", "lag(unionshare, 0:1)1", "lag(aupcont, 0:1)1", "lag(govparty, 0:1)1"), rep(0,8), test=c("Chisq", "F"))

# F-test on unlagged explanatory variables
linearHypothesis(plm.unemp.2, c("lag(unemp_gini, 0:1)0", "lag(I(unemp_gini^2), 0:1)0", "lag(stateunemp, 0:1)0", "lag(blackshare, 0:1)0", "lag(log(med_wage), 0:1)0", "lag(unionshare, 0:1)0", "lag(aupcont, 0:1)0", "lag(govparty, 0:1)0"), rep(0,8), test=c("Chisq", "F"))



# Panel bootstrapped SE (see Bischof 2009)

# Define function for data into list form; each panel is demeaned (since FE model) and stored as a separate element in the list
data.to.list <- function(plm.obj, npanel, ntime){
	unitID <- NULL
	for(i in 1:npanel){
		unitID <- append(unitID, rep(as.numeric(i), ntime))
		}
	response <- cbind(as.vector(plm.obj$model[,1]), unitID)
	response.demean <- stack(by(response, response[,2], function(subset) subset[,1]-mean(subset[,1])))[,1]
	data.ID <- as.data.frame(cbind(response.demean, model.matrix(plm.obj)))
	split.data <- split(data.ID, unitID)
	return(split.data)
	}

# Run function on model from skills 2
plm.unemp.2.list <- data.to.list(plm.unemp.2, npanel=49, ntime=17)

# Define panel sample function: samples N out of N panels with replacement and runs OLS on ADLLDV2 model
panel.boot <- function(split.MM, npanel){
	index <- sample(1:npanel, size=npanel, replace=TRUE)
	mm.boot <- do.call("rbind", split.MM[index])[,-1]
	response.boot <- do.call("rbind", split.MM[index])[,1]
	lm.boot <- lm(response.boot~., data=mm.boot)
	return(coefficients(lm.boot))
	}

# Run bootstrap 1000 times
set.seed(01238)
plm.unemp.2.boot <- replicate(1000, panel.boot(plm.unemp.2.list, npanel=49))


# Calculate bootstrapped standard errors standard errors
plm.unemp.2.boot.se <- apply(plm.unemp.2.boot, MARGIN=1, FUN=sd)
plm.unemp.2.boot.se.out <- xtable(as.matrix(plm.unemp.2.boot.se))
digits(plm.unemp.2.boot.se.out) <- 4
plm.unemp.2.boot.se.out



# Run regression on undemeaned data by using fixed effects (state dummies) instead #

# let's generate the unit and time labels we need to get undemeaned data
# we have 17 time observations #
unique.time <- 1991:2007 
groupT <- rep(1991:2007, 49)

# we have 49 cross-sectional units; we omit Nebraska (statefip=33) due to the fact that political variables do not apply to it and Washington DC (statefip=11) since we don't have variables for it (and doesn't make sense to impute)#
groupID.adj <- groupID[groupID!=33 & groupID!=11]
length(groupID.adj)

groupN <- c()
for(i in 1:length(groupID.adj)){
	groupN <- append(groupN, rep(groupID.adj[i],length(unique.time) ) )
	}

data.unemp.2.non.demeaned <- cbind(as.data.frame(plm.unemp.2$model), as.factor(groupN))
colnames(data.unemp.2.non.demeaned) <- c("unempben", "L1.unempben", "L2.unempben", "unempgini", "L1.unempgini", "unempginisq", "L1.unempginisq", "stateunemp", "L1.stateunemp", "blackshare", "L1.blackshare", "logmedwage", "L1.logmedwage", "unionshare", "L1.unionshare", "aupcont", "L1.aupcont", "govparty", "L1.govparty", "time", "fip")
unemp.2.y.nondemean <- data.unemp.2.non.demeaned[,1]
unemp.2.MM.nondemean <-data.unemp.2.non.demeaned[,-1] 
lm.unemp.2.nondemean <- lm(unemp.2.y.nondemean ~., data=unemp.2.MM.nondemean)
summary(lm.unemp.2.nondemean)




###########################################
##### IMPULSE RESPONSE FUNCTION #####
###########################################

set.seed(02138)
beta.unemp.2.draws <- mvrnorm(10000, mu = coefficients(lm.unemp.2.nondemean), Sigma = vcov(lm.unemp.2.nondemean))


# IRF for minimum equilibrium value of unemployment risk gini
initial.x <-  summary(data.unemp.2.non.demeaned$unempgini)["Min."]
sd.unemp <- sd(data.unemp.2.non.demeaned$unempgini)

# marginal response at time t=0
marginal.0 <- (beta.unemp.2.draws[,4]+2*beta.unemp.2.draws[,6]*initial.x)*sd.unemp

# decay for time t=1
theta <- beta.unemp.2.draws[,2]
marginal.1 <- sd.unemp*(theta*marginal.0 + beta.unemp.2.draws[,5]+beta.unemp.2.draws[,7]*initial.x)

# decay for time=2
gamma <- beta.unemp.2.draws[,3]
marginal.2 <- (theta*(marginal.1) +gamma*marginal.0)*sd.unemp

# decay for time=3
marginal.3 <- (theta*(marginal.2) +gamma*marginal.1)*sd.unemp

# decay for time=4
marginal.4 <- (theta*(marginal.3) +gamma*marginal.2)*sd.unemp

# decay for time=5
marginal.5 <- (theta*(marginal.4) +gamma*marginal.3)*sd.unemp

marginal.effect <- cbind(marginal.0, marginal.1, marginal.2, marginal.3, marginal.4, marginal.5)
plot(x=0:5, apply(marginal.effect, 2, FUN=mean), type="l", col="navy", ylim=c(-.005, .01), xlab="Time", ylab="Marginal Effect on UI Generosity", main="Minimum Equilibrium Value of Unemployment Risk Inequality")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .95), type="l", lty=2, col="navy")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .05), type="l", lty=2, col="navy")
abline(h=0, lty=2, col="red")


# IRF for 25% percentile equilibrium value of unemployment risk gini

initial.x <-  summary(data.unemp.2.non.demeaned$unempgini)["1st Qu."]
sd.unemp <- sd(data.unemp.2.non.demeaned$unempgini)

# marginal response at time t=0
marginal.0 <- (beta.unemp.2.draws[,4]+2*beta.unemp.2.draws[,6]*initial.x)*sd.unemp

# decay for time t=1
theta <- beta.unemp.2.draws[,2]
marginal.1 <- sd.unemp*(theta*marginal.0 + beta.unemp.2.draws[,5]+beta.unemp.2.draws[,7]*initial.x)
 
# decay for time=2
gamma <- beta.unemp.2.draws[,3]
marginal.2 <- (theta*(marginal.1) +gamma*marginal.0)*sd.unemp

# decay for time=3
marginal.3 <- (theta*(marginal.2) +gamma*marginal.1)*sd.unemp

# decay for time=4
marginal.4 <- (theta*(marginal.3) +gamma*marginal.2)*sd.unemp

# decay for time=5
marginal.5 <- (theta*(marginal.4) +gamma*marginal.3)*sd.unemp

marginal.effect <- cbind(marginal.0, marginal.1, marginal.2, marginal.3, marginal.4, marginal.5)
plot(x=0:5, apply(marginal.effect, 2, FUN=mean), type="l", col="navy", ylim=c(-.005, .01), xlab="Time", ylab="Marginal Effect on UI Generosity", main="1st Quartile Equilibrium Value of Unemployment Risk Inequality")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .95), type="l", lty=2, col="navy")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .05), type="l", lty=2, col="navy")
abline(h=0, lty=2, col="red")




# IRF for median equilibrium value of unemployment risk gini
initial.x <-  summary(data.unemp.2.non.demeaned$unempgini)["Median"]
sd.unemp <- sd(data.unemp.2.non.demeaned$unempgini)
# marginal response at time t=0
marginal.0 <- (beta.unemp.2.draws[,4]+2*beta.unemp.2.draws[,6]*initial.x)*sd.unemp

# decay for time t=1
theta <- beta.unemp.2.draws[,2]
marginal.1 <- sd.unemp*(theta*marginal.0 + beta.unemp.2.draws[,5]+beta.unemp.2.draws[,7]*initial.x)
 
# decay for time=2
gamma <- beta.unemp.2.draws[,3]
marginal.2 <- (theta*(marginal.1) +gamma*marginal.0)*sd.unemp

# decay for time=3
marginal.3 <- (theta*(marginal.2) +gamma*marginal.1)*sd.unemp

# decay for time=4
marginal.4 <- (theta*(marginal.3) +gamma*marginal.2)*sd.unemp

# decay for time=5
marginal.5 <- (theta*(marginal.4) +gamma*marginal.3)*sd.unemp

marginal.effect <- cbind(marginal.0, marginal.1, marginal.2, marginal.3, marginal.4, marginal.5)
plot(x=0:5, apply(marginal.effect, 2, FUN=mean), type="l", col="navy", ylim=c(-.005, .01), xlab="Time", ylab="Marginal Effect on UI Generosity", main="Median Equilibrium Value of Unemployment Risk Inequality")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .95), type="l", lty=2, col="navy")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .05), type="l", lty=2, col="navy")
abline(h=0, lty=2, col="red")


# IRF for 75% percentile equilibrium value of unemployment risk gini

initial.x <-  summary(data.unemp.2.non.demeaned$unempgini)["3rd Qu."]
sd.unemp <- sd(data.unemp.2.non.demeaned$unempgini)

# marginal response at time t=0
marginal.0 <- (beta.unemp.2.draws[,4]+2*beta.unemp.2.draws[,6]*initial.x)*sd.unemp

# decay for time t=1
theta <- beta.unemp.2.draws[,2]
marginal.1 <- sd.unemp*(theta*marginal.0 + beta.unemp.2.draws[,5]+beta.unemp.2.draws[,7]*initial.x)
 
# decay for time=2
gamma <- beta.unemp.2.draws[,3]
marginal.2 <- (theta*(marginal.1) +gamma*marginal.0)*sd.unemp

# decay for time=3
marginal.3 <- (theta*(marginal.2) +gamma*marginal.1)*sd.unemp

# decay for time=4
marginal.4 <- (theta*(marginal.3) +gamma*marginal.2)*sd.unemp

# decay for time=5
marginal.5 <- (theta*(marginal.4) +gamma*marginal.3)*sd.unemp

marginal.effect <- cbind(marginal.0, marginal.1, marginal.2, marginal.3, marginal.4, marginal.5)
plot(x=0:5, apply(marginal.effect, 2, FUN=mean), type="l", col="navy", ylim=c(-.005, .01), xlab="Time", ylab="Marginal Effect on UI Generosity", main="3rd Quartile Equilibrium Value of Unemployment Risk Inequality")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .95), type="l", lty=2, col="navy")
points(x=0:5, apply(marginal.effect, 2, FUN=quantile, .05), type="l", lty=2, col="navy")
abline(h=0, lty=2, col="red")






######################################################################################
### Family Income Gini as Main Explanatory Variable ###
######################################################################################
# Begin with autoregressive distributed lags (ADL) model (LDV and AR(1) specifications are nested within this more encompassing model)
plm.faminc.1 <- plm(unempben ~ lag(unempben, 1) + lag(faminc_gini, 0:1) + lag(I(faminc_gini^2), 0:1) + lag(stateunemp, 0:1) + lag(blackshare, 0:1) + lag(log(med_wage), 0:1) + lag(unionshare, 0:1) + lag(aupcont, 0:1) + lag(govparty, 0:1) + time, data=macro.panel, method="within")

# Durbin-Watson test for autocorrelation; autocorrelation detected since null rejected #
pdwtest(plm.faminc.1)

# Add another lag of dependent variable, as recommended by Beck and Katz (2011)
# ADLLDV2 model
plm.faminc.2 <- plm(unempben ~ lag(unempben, 1:2) + lag(faminc_gini, 0:1) + lag(I(faminc_gini^2), 0:1) + lag(stateunemp, 0:1) + lag(blackshare, 0:1) + lag(log(med_wage), 0:1) + lag(unionshare, 0:1) + lag(aupcont, 0:1) + lag(govparty, 0:1) + time, data=macro.panel, method="within")
summary(plm.faminc.2)


# Durbin-Watson test for autocorrelation; autocorrelation not detected in residuals since null not rejected #
pdwtest(plm.faminc.2)

# Verify lack of autocorrelation with Breusch-Godfrey/Wooldridge test
pbgtest(plm.faminc.2, order=1)




######################################################################################
### Median Duration of Unemployment Gini as Main Explanatory Variable ###
######################################################################################

# Begin with autoregressive distributed lags (ADL) model
plm.dur.1 <- plm(unempben ~ lag(unempben, 1) + lag(median_dur, 0:1) + lag(I(median_dur^2), 0:1) + lag(stateunemp, 0:1) + lag(blackshare, 0:1) + lag(log(med_wage), 0:1) + lag(unionshare, 0:1) + lag(aupcont, 0:1) + lag(govparty, 0:1) + time, data=macro.panel, method="within")

# test autocorrelation; autocorrelation detected #
pdwtest(plm.dur.1)

# Add another lag of dependent variable, as recommended by Beck and Katz (2011)
# ADLLDV2 model
plm.dur.2 <- plm(unempben ~ lag(unempben, 1:2) + lag(median_dur, 0:1) + lag(I(median_dur^2), 0:1) + lag(stateunemp, 0:1) + lag(blackshare, 0:1) + lag(log(med_wage), 0:1) + lag(unionshare, 0:1) + lag(aupcont, 0:1) + lag(govparty, 0:1) + time, data=macro.panel, method="within")
summary(plm.dur.2)

# test autocorrelation; autocorrelation not detected #
pdwtest(plm.dur.2)

# Verify lack of autocorrelation with Breusch-Godfrey/Wooldridge test
pbgtest(plm.dur.2, order=1)

